# Simulate multiple loci coalescent process with heterochronous sampling 

# Assumptions and modifications
# - focuses on bottleneck and cyclic trajectories
# - generates multiple conditionally independent trees (data)
# - deposits multiple trees in a single folder
# - samples placed uniformly across time period


# Clean the workspace and console
closeAllConnections()
rm(list=ls())
cat("\014")  
graphics.off()

# Packages for phylodyn
library("sp")
library("devtools")
library("INLA")
library("spam")
library("ape")
library("phylodyn")

# Set working directory to source
this.dir <- dirname(parent.frame(2)$ofile)
setwd(this.dir)

# Function to write simple csv files to correct path
tableWrite <- function(val, name, pathname) {
  # Add path to name
  str0 <- paste(c(pathname, name), collapse = "")
  # Write table
  write.table(val, str0, row.names=FALSE, col.names=FALSE, sep=",")
}

# Define a middling bottleneck
bottle_traj <- function (t) 
{
  result = rep(0, length(t))
  result[t <= 15] <- 500
  result[t > 15 & t < 40] <- 20
  result[t >= 40] <- 500
  return(result)
}

# Main code for heterochronous simulations ----------------------------------------------------------

# No. loci considered (independent trees)
numLoci = 6
# Choose trajectory case
trajCase = 2
if(trajCase == 1){
  traj = cyclic_traj
  trajVal = 'cyclicLoci'
} else{
  traj = bottle_traj
  trajVal = 'bottleLoci'
}

# Uniform sampling across time
all_samp_end = 48
nsamps = 801; ndivs = 20
# Sample number and times
samps = c(rep(floor(nsamps/ndivs), ndivs-1), nsamps-(ndivs-1)*floor(nsamps/ndivs))
samp_times = seq(0, all_samp_end, length.out = ndivs)

# Create folder for traj specific results
trajName = paste(c(trajVal, '_', nsamps-1), collapse = '')
dir.create(file.path(this.dir, trajName))
pathf = paste(c(this.dir, '/', trajName, '/'), collapse = "")

# Coalescent events and max time for each trajectory
nc = rep(0, numLoci); tmax = rep(0, numLoci)

for (i in 1:numLoci) {
  
  # Simulate genealogy and get all times
  gene = coalsim(samp_times = samp_times, n_sampled = samps, traj = traj, lower_bound = 10, method = "thin")
  coal_times = gene$coal_times
  coalLin = gene$lineages
  
  # TMRCA and no. coalescent events
  tmax[i] = max(coal_times)
  nc[i] = length(coal_times)
  
  # Export teajectory specific data for Matlab
  tableWrite(coal_times, paste(c('coaltimes', i, '.csv'), collapse = ''), pathf)
  tableWrite(coalLin, paste(c('coalLin', i, '.csv'), collapse = ''), pathf)
}

# Check unique nc and store
nc = unique(nc); tableWrite(nc, 'nc.csv', pathf)
# Number of loci
tableWrite(numLoci, 'numLoci.csv', pathf)

# Sample scheme
tableWrite(samp_times, 'samptimes.csv', pathf)
tableWrite(samps, 'sampIntro.csv', pathf)

# True population size
t = seq(0, max(tmax), length=20000); y = traj(t)
tableWrite(t, 'trajt.csv', pathf)
tableWrite(y, 'trajy.csv', pathf)
